from AddData import add_player_understat_data, add_team_understat_data, add_opp_understat_data
from collections import Counter
import datetime as dt
from GetClubELOData import add_elo
from GetFPLData import load_fpl_data
import GetUnderstatData as getus
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import numpy as np
import os
import pandas as pd
import pytz
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import silhouette_score
import statsmodels.api as sm
import statsmodels.tools as st
import statsmodels.graphics.api as smg
The first thing to do here is to get the data that we will be using in the model. The data will come from three sources:
data_dir = "C:/Users/jonat/OneDrive/Documents/Projects/Fantasy Premier League/Data"
seasons = ["{}-{}".format(y, str(y + 1)[2:4]) for y in range(2021, 2023)]
# Find the columns in the FPL data that exist for all seasons of the data
initial = True
for season in seasons:
fpl_data = load_fpl_data(season)
if initial:
columns = fpl_data.columns
columns = columns.intersection(fpl_data.columns)
initial = False
# Create set of FPL data
print("\nLoading FPL data")
fpl_df = pd.DataFrame()
for season in seasons:
fpl_df1 = load_fpl_data(season)
fpl_df = pd.concat([fpl_df, fpl_df1[columns]], ignore_index=True)
# Only select data for goalkeepers
fpl_df = fpl_df.loc[fpl_df['element_type'] == 1, :]
Loading FPL data
# Add Understat data to the dataset
print("\nAdding player Understat data")
fpl_df = add_player_understat_data(fpl_df)
# Add team Understat data to the dataframe
print("\nAdding team Understat data")
fpl_df = add_team_understat_data(fpl_df)
# Add team and opposition Elo scores to the dataframe
print("\nAdding ClubELO scores")
fpl_df = add_elo(fpl_df)
Adding player Understat data 2021-22 2022-23 Adding team Understat data 2021-22 2022-23 Adding ClubELO scores
# Make "was_home" numeric
fpl_df['was_home'] = fpl_df.was_home.astype("int")
# Add zeros instead of NaNs in player Understat statistics where the player played zero minutes
player_us_stats = ["shots", "xG", "position", "xA", "key_passes", "npg", "npxG", "xGChain", "xGBuildup"]
fpl_df.loc[fpl_df.minutes == 0, player_us_stats] = 0
# Get player and team lagged statistics
player_lag_stats = ['assists', 'bonus', 'bps', 'clean_sheets', 'creativity',
'goals_conceded', 'goals_scored', 'ict_index', 'influence',
'minutes', 'own_goals', 'penalties_missed', 'penalties_saved',
'red_cards', 'saves', 'selected', 'team_a_score',
'team_h_score', 'threat', 'total_points', 'transfers_in',
'transfers_out', 'value', 'was_home', 'yellow_cards',
'starts', 'shots', 'xG', 'xA', 'key_passes', 'npg', 'npxG',
'xGChain', 'xGBuildup']
team_lag_stats = ['team_xG', 'team_xGA', 'team_npxG', 'team_npxGA', 'team_deep',
'team_deep_allowed', 'team_scored', 'team_missed', 'team_xpts',
'team_wins', 'team_draws', 'team_loses', 'team_ppda_att',
'team_ppda_def', 'team_ppda_allowed_att', 'team_ppda_allowed_def']
lag_stats = player_lag_stats + team_lag_stats
max_lag = 4
lags = range(1, max_lag+1)
for lag in lags:
lag_colnames = [colname + "_{}".format(lag) for colname in lag_stats]
fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby(
['season_x', 'element'])[lag_stats].shift(lag)
C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby( C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3381905693.py:4: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()` fpl_df[lag_colnames] = fpl_df.sort_values(by='kickoff_date').groupby(
# Add opposition lagged statistics
opp_df = fpl_df[['kickoff_date', 'opp_team_name']].drop_duplicates()
opp_keys = zip(opp_df.kickoff_date, opp_df.opp_team_name)
team_lagged_colnames = [stat + "_{}".format(lag) for lag in lags for stat in team_lag_stats]
opp_lagged_colnames = [colname.replace("team", "opp") for colname in team_lagged_colnames]
for date, opp_name in opp_keys:
opp_lagged_stats = fpl_df.loc[((fpl_df.kickoff_date == date) &
(fpl_df.team_x == opp_name)),
team_lagged_colnames].drop_duplicates()
# Select row with fewest null values
min_na_idx = opp_lagged_stats.isna().astype(int).sum(axis=1).idxmin()
fpl_df.loc[((fpl_df.kickoff_date == date) & (fpl_df.opp_team_name == opp_name)),
opp_lagged_colnames] = opp_lagged_stats.loc[min_na_idx].tolist()
fpl_df['has_na'] = fpl_df.isna().any(axis=1)
row_hasna = fpl_df.has_na.value_counts()
print("Rows containing null values: {}".format(row_hasna[True]))
print("Rows not containing null values: {}".format(row_hasna[False]))
print("{:.1f}% of rows contain null values".format(100*row_hasna[True]/row_hasna.sum()))
Rows containing null values: 668 Rows not containing null values: 5033 11.7% of rows contain null values
model_df = fpl_df.copy().dropna(axis=0)
excl_df = fpl_df.drop(model_df.index, axis=0)
starters = fpl_df.loc[fpl_df.starts == 1].index
non_starters = fpl_df.loc[fpl_df.starts == 0].index
We are interested in examining the difference in points distribution between starting players and substitutes. The figure below shows clearly that there are significant differences between the distributions of points for starters and non-starters.
# Look at the difference in returns between starters and non-starters
tp_s = fpl_df.loc[starters, "total_points"].value_counts()
tp_s = tp_s/tp_s.sum()
tp_ns = fpl_df.loc[non_starters, "total_points"].value_counts()
tp_ns = tp_ns/tp_ns.sum()
fig, ax = plt.subplots()
ax.bar(tp_s.index, tp_s.values, alpha=0.4, label="Starters")
ax.bar(tp_ns.index, tp_ns.values, alpha=0.4, label="Non-starters", color='r')
ax.set_xlabel("Total points")
ax.set_ylabel("Probability")
ax.legend()
plt.show()
The summary statistics for the distributions are shown in the table below.
| Starters | Non-starters | |
|---|---|---|
| Mean | 3.608 | 0.005 |
| Variance | 8.190 | 0.020 |
In light of such differing distributions for starters and non-starters, we aim to predict the points scored by a player given that they start in a match. Then, it will be a question of assigning a probability to the chance that a player starts a match in order to determine their expected score. This will not currently be within the scope of this project although
s_mean = sum(tp_s.index * tp_s.values)
ns_mean = sum(tp_ns.index * tp_ns.values)
s_var = sum(tp_s.values * (tp_s.index**2)) - s_mean**2
ns_var = sum(tp_ns.values * ((tp_ns.index - ns_mean)**2))
print("Mean for starters = {:.3f}".format(s_mean))
print("Mean for non-starters = {:.3f}".format(ns_mean))
print("Variance for starters = {:.3f}".format(s_var))
print("Variance for non-starters = {:.3f}".format(ns_var))
Mean for starters = 3.608 Mean for non-starters = 0.005 Variance for starters = 8.190 Variance for non-starters = 0.020
# Create a violin plot of total_points returns for starters and non-starters
fig, ax = plt.subplots()
ax.violinplot([fpl_df.loc[starters, 'total_points'], fpl_df.loc[non_starters, 'total_points']],
showmedians=True)
ax.set_xticks([1, 2], labels=["Starters", "Non-starters"])
ax.set_ylabel("Total points")
plt.show()
# Compare the percentages of starters in the included and excluded data
starts_inc = model_df['starts'].value_counts()
starts_inc = starts_inc/starts_inc.sum()
starts_exc = excl_df['starts'].value_counts()
starts_exc = starts_exc/starts_exc.sum()
bar_width = 0.18
x_inc = [1 - 0.5*bar_width, 2 - 0.5*bar_width]
x_exc = [1 + 0.5*bar_width, 2 + 0.5*bar_width]
fig, ax = plt.subplots()
ax.bar(x_inc, starts_inc.values, width=bar_width, color='b', alpha=0.6, label="Included data")
ax.bar(x_exc, starts_exc.values, width=bar_width, color='r', alpha=0.6, label="Excluded data")
ax.set_xticks([1, 2], ["Non-starters", "Starters"])
ax.set_ylabel("Probability")
ax.legend()
plt.show()
# Drop players who did not start from the model dataframe
model_df.drop(model_df.loc[model_df.starts == 0].index, axis=0, inplace=True)
# Create new features
def create_feature_cols(df):
df1 = df.drop_duplicates(["element"])
df1['transfers_in_pct'] = 100*df1.transfers_in/df1.transfers_in.sum()
df1['transfers_out_pct'] = 100*df1.transfers_out/df1.transfers_out.sum()
df1['transfers_balance_rank'] = df1.transfers_balance.rank()
df = df.merge(df1[['element', 'transfers_in_pct', 'transfers_out_pct', 'transfers_balance_rank']],
how='left',
on='element')
return df[['element', 'transfers_in_pct', 'transfers_out_pct', 'transfers_balance_rank']]
new_features_df = model_df.groupby(["season_x", "round"]).apply(create_feature_cols)
new_features_df.index = new_features_df.index.droplevel(2)
new_features_df.reset_index(inplace=True)
model_df = pd.merge(model_df, new_features_df)
model_df.drop_duplicates(inplace=True)
# Get some response vs regressor plots and add new features
model_df['clean_sheet_ratio'] = model_df[["clean_sheets_{}".format(x) for x in range(1, 5)]].mean(axis=1)
model_df['opp_xG_avg'] = model_df[["opp_xG_{}".format(x) for x in range(1, 5)]].mean(axis=1)
model_df['opp_G_avg'] = model_df[["opp_scored_{}".format(x) for x in range(1, 5)]].mean(axis=1)
plot_regressors = (['selected', 'transfers_in', 'transfers_out', 'value', 'was_home', 'team_elo', 'opp_elo',
'transfers_balance', 'transfers_balance_rank', 'transfers_in_pct', 'transfers_out_pct', 'diff_elo',
'clean_sheet_ratio', 'opp_xG_avg', 'opp_G_avg'] +
['team_ppda_allowed_def_{}'.format(x) for x in range(1, 5)] +
['opp_ppda_allowed_def_{}'.format(x) for x in range(1, 5)])
for regressor in plot_regressors:
fig, ax = plt.subplots()
ax.scatter(model_df[regressor], model_df['total_points'], edgecolor='k', linewidths=0.5)
ax.set_xlabel(regressor)
ax.set_ylabel("Total points")
plt.show()
if (model_df[regressor] > 0).all():
fig, ax = plt.subplots()
ax.scatter(np.log(model_df[regressor].astype(float)), model_df['total_points'], edgecolor='k', linewidths=0.5)
ax.set_xlabel(("Log " + regressor))
ax.set_ylabel("Total points")
plt.show()
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) 3620 try: -> 3621 return self._engine.get_loc(casted_key) 3622 except KeyError as err: ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() ~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'diff_elo' The above exception was the direct cause of the following exception: KeyError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_30348\615718920.py in <module> 1 for regressor in plot_regressors: 2 fig, ax = plt.subplots() ----> 3 ax.scatter(model_df[regressor], model_df['total_points'], edgecolor='k', linewidths=0.5) 4 ax.set_xlabel(regressor) 5 ax.set_ylabel("Total points") ~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 3503 if self.columns.nlevels > 1: 3504 return self._getitem_multilevel(key) -> 3505 indexer = self.columns.get_loc(key) 3506 if is_integer(indexer): 3507 indexer = [indexer] ~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) 3621 return self._engine.get_loc(casted_key) 3622 except KeyError as err: -> 3623 raise KeyError(key) from err 3624 except TypeError: 3625 # If we have a listlike key, _check_indexing_error will raise KeyError: 'diff_elo'
# Add some transformed explanatory variables
log_variables = ['selected', 'transfers_in', 'transfers_out', 'transfers_in_pct', 'transfers_out_pct']
for var in log_variables:
log_var = "log_" + var
model_df[log_var] = np.log(model_df[var])
# Add some second-order variable interactions with "was_home" and "diff_elo"
# was_home_x_vars = ['selected', 'transfers_in', 'transfers_out', 'value', 'team_elo', 'opp_elo', 'transfers_balance',
# 'transfers_balance_rank', 'transfers_in_pct', 'transfers_out_pct', 'diff_elo', 'clean_sheet_ratio',
# 'opp_xG_avg', 'opp_G_avg']
# diff_elo_x_vars = ['selected', 'transfers_in', 'transfers_out', 'value', 'was_home', 'team_elo', 'opp_elo',
# 'transfers_balance', 'transfers_balance_rank', 'transfers_in_pct', 'transfers_out_pct',
# 'clean_sheet_ratio', 'opp_xG_avg', 'opp_G_avg']
# for var in was_home_x_vars:
# new_var_name = "was_home_x_" + var
# model_df[new_var_name] = model_df['was_home'] * model_df[var]
# for var in diff_elo_x_vars:
# new_var_name = "diff_elo_x_" + var
# model_df[new_var_name] = model_df['diff_elo'] * model_df[var]
# Sort the dataframe by time
model_df.sort_values("kickoff_date", ascending=True)
# Drop columns that will not be used in the models
drop_cols = ['element', 'fixture', 'kickoff_time', 'opponent_team', 'round',
'transfers_balance', 'name', 'season_x', 'opp_team_name',
'kickoff_datetime', 'kickoff_date', 'team_x', 'element_type',
'position', 'team_npxGD', 'team_result', 'team_pts', 'expected_goals_conceded',
'expected_assists', 'expected_goal_involvements', 'expected_goals']
keep_stats = ['total_points', 'selected', 'transfers_in', 'transfers_out', 'value',
'was_home']
drop_stats = list(set(lag_stats) - set(keep_stats))
model_df.drop((drop_cols + drop_stats), axis=1, inplace=True, errors='ignore')
# Set all remaining columns to be numeric
model_df = model_df.astype(float)
# Split into a test set and a training set
n_obs = model_df.shape[0]
model_df.index = range(len(model_df))
test_frac = 0.2
test_size = int(np.round(n_obs * test_frac))
test_idx = model_df.index[(len(model_df) - test_size):]
test = model_df.loc[test_idx]
train = model_df.drop(test_idx, axis=0)
# Split response and predictor
y_train = train.total_points
y_test = test.total_points
X_train = train.drop("total_points", axis=1)
X_test = test.drop("total_points", axis=1)
print(y_train.shape)
print(y_test.shape)
print(X_train.shape)
print(X_test.shape)
# Normalize the design matrices
X_mean = X_train.mean()
X_std = X_train.std()
X_train = ((X_train - X_mean)/X_std).fillna((X_train - X_mean))
X_test = ((X_test - X_mean)/X_std).fillna((X_test - X_mean))
# Drop any constant columns
const_cols = X_train.loc[:, (X_train == 0).all(0)].columns
X_train.drop(const_cols, axis=1, inplace=True)
X_test.drop(const_cols, axis=1, inplace=True)
# Add constant term
X_train = sm.add_constant(X_train, has_constant="add")
X_test = sm.add_constant(X_test, has_constant="add")
# Fit OLS regression model
model = sm.OLS(y_train, X_train)
res = model.fit()
print(res.summary())
resid = res.resid_pearson
# QQ plot for residuals
fig = sm.qqplot(resid)
a = np.linspace(-3, 3, 2)
plt.plot(a, a, color='black', linestyle='--')
plt.show()
# Plot of standardized residuals vs fitted values
fitted_values = res.fittedvalues
fig, ax = plt.subplots()
ax.scatter(fitted_values, resid, edgecolors='black', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Standardised residuals")
plt.show()
# Plot of fitted values vs response
fig, ax = plt.subplots()
ax.scatter(y_train, fitted_values, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
ax.set_axisbelow(True)
plt.grid()
plt.show()
# Get predicted values for the test data
X_test = X_test[X_train.columns]
test_fitted_values = res.predict(X_test)
test_resid = test_fitted_values - y_test
# Plot of fitted values vs actual values
fig, ax = plt.subplots()
ax.scatter(y_test, test_fitted_values, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
plt.show()
# Plot of residuals vs fitted values
fig, ax = plt.subplots()
ax.scatter(test_fitted_values, test_resid, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Raw residuals")
# Compute OOS MAE and MSE
mae = np.mean(abs(test_resid))
mse = np.mean(test_resid**2)
print("MAE: {:.3f}\nMSE: {:.3f}".format(mae, mse))
# Remove negative outliers from the response, shift and apply Box-Cox transform
negative_responses = y_train.loc[y_train < 0].index
y_train_tr = y_train.drop(negative_responses)
X_train_tr = X_train.drop(negative_responses, axis=0)
y_train_tr = y_train_tr + 1
y_train_tr, tr_lambda = stats.boxcox(y_train_tr)
# Fit OLS regression model
model_tr = sm.OLS(y_train_tr, X_train_tr)
res_tr = model_tr.fit()
print(res_tr.summary())
resid_tr = res_tr.resid_pearson
# QQ plot for residuals
fig = sm.qqplot(resid_tr)
a = np.linspace(-3, 3, 2)
plt.plot(a, a, color='black', linestyle='--')
plt.show()
# Plot of standardized residuals vs fitted values
fitted_values_tr = res_tr.fittedvalues
fig, ax = plt.subplots()
ax.scatter(fitted_values_tr, resid_tr, edgecolors='black', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Standardised residuals")
plt.show()
# Plot of fitted values vs response
fig, ax = plt.subplots()
ax.scatter(y_train_tr, fitted_values_tr, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
ax.set_axisbelow(True)
plt.grid()
plt.show()
# Check for highly correlated columns in the design matrix
corr_mat = X_train.corr()
corr_threshold = 0.85
is_corr = (corr_mat >= corr_threshold)
for i in range(is_corr.shape[0]):
for j in range(i+1, is_corr.shape[1]):
if is_corr.iloc[i, j]:
print("[{}, {}]".format(is_corr.index[i], is_corr.columns[j]))
# Drop highly correlated columns
drop_cols = (["opp_npxGA_{}".format(x) for x in range(1, 5)] +
["opp_npxG_{}".format(x) for x in range(1, 5)] +
["team_npxG_{}".format(x) for x in range(1, 5)] +
["team_npxGA_{}".format(x) for x in range(1, 5)] +
["selected_{}".format(x) for x in range(1, 5)] +
["value_{}".format(x) for x in range(1, 5)] +
["influence_{}".format(x) for x in range(1, 5)] +
["ict_index_{}".format(x) for x in range(1, 5)] +
["creativity_{}".format(x) for x in range(1, 5)] +
["threat_{}".format(x) for x in range(1, 5)] +
["xGBuildup_{}".format(x) for x in range(1, 5)] +
["npg_{}".format(x) for x in range(1, 5)] +
["npxG_{}".format(x) for x in range(1, 5)] +
["team_missed_{}".format(x) for x in range(1, 5)] +
["bps_{}".format(x) for x in range(1, 5)] +
["shots_{}".format(x) for x in range(1, 5)] +
["assists_{}".format(x) for x in range(1, 5)] +
["red_cards_{}".format(x) for x in range(1, 5)] +
["starts_{}".format(x) for x in range(1, 5)]
)
X_train2 = X_train.drop(drop_cols, axis=1, errors='ignore')
X_test2 = X_test.drop(drop_cols, axis=1, errors='ignore')
# Check for highly correlated columns in the design matrix
corr_mat = X_train2.corr()
is_corr = (corr_mat >= corr_threshold)
for i in range(is_corr.shape[0]):
for j in range(i+1, is_corr.shape[1]):
if is_corr.iloc[i, j]:
print("[{}, {}] : {:.3f}".format(is_corr.index[i], is_corr.columns[j], corr_mat.iloc[i, j]))
# Fit new model
model2 = sm.OLS(y_train, X_train2)
res2 = model2.fit()
print(res2.summary())
resid_2 = res2.resid_pearson
# QQ plot for residuals
fig = sm.qqplot(resid_2)
a = np.linspace(-3, 3, 2)
plt.plot(a, a, color='black', linestyle='--')
plt.show()
# Plot of standardized residuals vs fitted values
fitted_values_2 = res2.fittedvalues
fig, ax = plt.subplots()
ax.scatter(fitted_values_2, resid_2, edgecolors='black', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Standardised residuals")
plt.show()
# Plot of fitted values vs response
fig, ax = plt.subplots()
ax.scatter(y_train, fitted_values_2, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
ax.set_axisbelow(True)
plt.grid()
plt.show()
# Get predicted values for the test data
X_test2 = X_test[X_train2.columns]
test_fitted_values_2 = res2.predict(X_test2)
test_resid_2 = test_fitted_values_2 - y_test
# Plot of fitted values vs actual values
fig, ax = plt.subplots()
ax.scatter(y_test, test_fitted_values_2, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
plt.show()
# Plot of residuals vs fitted values
fig, ax = plt.subplots()
ax.scatter(test_fitted_values_2, test_resid_2, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Raw residuals")
Text(0, 0.5, 'Raw residuals')
# Compute OOS MAE and MSE
mae = np.mean(abs(test_resid_2))
mse = np.mean(test_resid_2**2)
print("MAE: {:.3f}\nMSE: {:.3f}".format(mae, mse))
MAE: 2.786 MSE: 12.748
X_train.columns.tolist()
['const', 'selected', 'transfers_in', 'transfers_out', 'value', 'was_home', 'team_elo', 'opp_elo', 'assists_1', 'bonus_1', 'bps_1', 'clean_sheets_1', 'creativity_1', 'goals_conceded_1', 'ict_index_1', 'influence_1', 'minutes_1', 'own_goals_1', 'penalties_saved_1', 'red_cards_1', 'saves_1', 'selected_1', 'team_a_score_1', 'team_h_score_1', 'threat_1', 'total_points_1', 'transfers_in_1', 'transfers_out_1', 'value_1', 'was_home_1', 'yellow_cards_1', 'starts_1', 'shots_1', 'xG_1', 'xA_1', 'key_passes_1', 'npxG_1', 'xGChain_1', 'xGBuildup_1', 'team_xG_1', 'team_xGA_1', 'team_npxG_1', 'team_npxGA_1', 'team_deep_1', 'team_deep_allowed_1', 'team_scored_1', 'team_missed_1', 'team_xpts_1', 'team_wins_1', 'team_draws_1', 'team_loses_1', 'team_ppda_att_1', 'team_ppda_def_1', 'team_ppda_allowed_att_1', 'team_ppda_allowed_def_1', 'assists_2', 'bonus_2', 'bps_2', 'clean_sheets_2', 'creativity_2', 'goals_conceded_2', 'ict_index_2', 'influence_2', 'minutes_2', 'own_goals_2', 'penalties_saved_2', 'red_cards_2', 'saves_2', 'selected_2', 'team_a_score_2', 'team_h_score_2', 'threat_2', 'total_points_2', 'transfers_in_2', 'transfers_out_2', 'value_2', 'was_home_2', 'yellow_cards_2', 'starts_2', 'shots_2', 'xG_2', 'xA_2', 'key_passes_2', 'npxG_2', 'xGChain_2', 'xGBuildup_2', 'team_xG_2', 'team_xGA_2', 'team_npxG_2', 'team_npxGA_2', 'team_deep_2', 'team_deep_allowed_2', 'team_scored_2', 'team_missed_2', 'team_xpts_2', 'team_wins_2', 'team_draws_2', 'team_loses_2', 'team_ppda_att_2', 'team_ppda_def_2', 'team_ppda_allowed_att_2', 'team_ppda_allowed_def_2', 'assists_3', 'bonus_3', 'bps_3', 'clean_sheets_3', 'creativity_3', 'goals_conceded_3', 'ict_index_3', 'influence_3', 'minutes_3', 'own_goals_3', 'penalties_saved_3', 'red_cards_3', 'saves_3', 'selected_3', 'team_a_score_3', 'team_h_score_3', 'threat_3', 'total_points_3', 'transfers_in_3', 'transfers_out_3', 'value_3', 'was_home_3', 'yellow_cards_3', 'starts_3', 'shots_3', 'xG_3', 'xA_3', 'key_passes_3', 'npxG_3', 'xGChain_3', 'xGBuildup_3', 'team_xG_3', 'team_xGA_3', 'team_npxG_3', 'team_npxGA_3', 'team_deep_3', 'team_deep_allowed_3', 'team_scored_3', 'team_missed_3', 'team_xpts_3', 'team_wins_3', 'team_draws_3', 'team_loses_3', 'team_ppda_att_3', 'team_ppda_def_3', 'team_ppda_allowed_att_3', 'team_ppda_allowed_def_3', 'assists_4', 'bonus_4', 'bps_4', 'clean_sheets_4', 'creativity_4', 'goals_conceded_4', 'ict_index_4', 'influence_4', 'minutes_4', 'own_goals_4', 'penalties_saved_4', 'red_cards_4', 'saves_4', 'selected_4', 'team_a_score_4', 'team_h_score_4', 'threat_4', 'total_points_4', 'transfers_in_4', 'transfers_out_4', 'value_4', 'was_home_4', 'yellow_cards_4', 'starts_4', 'shots_4', 'xG_4', 'xA_4', 'key_passes_4', 'npxG_4', 'xGChain_4', 'xGBuildup_4', 'team_xG_4', 'team_xGA_4', 'team_npxG_4', 'team_npxGA_4', 'team_deep_4', 'team_deep_allowed_4', 'team_scored_4', 'team_missed_4', 'team_xpts_4', 'team_wins_4', 'team_draws_4', 'team_loses_4', 'team_ppda_att_4', 'team_ppda_def_4', 'team_ppda_allowed_att_4', 'team_ppda_allowed_def_4', 'opp_xG_1', 'opp_xGA_1', 'opp_npxG_1', 'opp_npxGA_1', 'opp_deep_1', 'opp_deep_allowed_1', 'opp_scored_1', 'opp_missed_1', 'opp_xpts_1', 'opp_wins_1', 'opp_draws_1', 'opp_loses_1', 'opp_ppda_att_1', 'opp_ppda_def_1', 'opp_ppda_allowed_att_1', 'opp_ppda_allowed_def_1', 'opp_xG_2', 'opp_xGA_2', 'opp_npxG_2', 'opp_npxGA_2', 'opp_deep_2', 'opp_deep_allowed_2', 'opp_scored_2', 'opp_missed_2', 'opp_xpts_2', 'opp_wins_2', 'opp_draws_2', 'opp_loses_2', 'opp_ppda_att_2', 'opp_ppda_def_2', 'opp_ppda_allowed_att_2', 'opp_ppda_allowed_def_2', 'opp_xG_3', 'opp_xGA_3', 'opp_npxG_3', 'opp_npxGA_3', 'opp_deep_3', 'opp_deep_allowed_3', 'opp_scored_3', 'opp_missed_3', 'opp_xpts_3', 'opp_wins_3', 'opp_draws_3', 'opp_loses_3', 'opp_ppda_att_3', 'opp_ppda_def_3', 'opp_ppda_allowed_att_3', 'opp_ppda_allowed_def_3', 'opp_xG_4', 'opp_xGA_4', 'opp_npxG_4', 'opp_npxGA_4', 'opp_deep_4', 'opp_deep_allowed_4', 'opp_scored_4', 'opp_missed_4', 'opp_xpts_4', 'opp_wins_4', 'opp_draws_4', 'opp_loses_4', 'opp_ppda_att_4', 'opp_ppda_def_4', 'opp_ppda_allowed_att_4', 'opp_ppda_allowed_def_4', 'transfers_in_pct', 'transfers_out_pct', 'transfers_balance_rank', 'diff_elo', 'clean_sheet_ratio', 'opp_xG_avg', 'opp_G_avg', 'log_selected', 'log_transfers_in', 'log_transfers_out', 'log_transfers_in_pct', 'log_transfers_out_pct']
# Perform stepwise backward selection, removing columns whose coefficients are not significantly different from zero
priority_list = ['log_selected', 'log_transfers_in', 'log_transfers_out', 'log_transfers_in_pct', 'log_transfers_out_pct', 'selected', 'transfers_in', 'transfers_out', 'transfers_in_pct', 'transfers_out_pct', 'transfers_balance_rank', 'value', 'was_home', 'team_elo', 'opp_elo', 'diff_elo', 'clean_sheet_ratio', 'opp_xG_avg', 'opp_G_avg', 'assists_1', 'bonus_1', 'clean_sheets_1', 'goals_scored_1', 'goals_conceded_1', 'minutes_1', 'own_goals_1', 'penalties_saved_1', 'red_cards_1', 'saves_1', 'team_a_score_1', 'team_h_score_1', 'total_points_1', 'transfers_in_1', 'transfers_out_1', 'was_home_1', 'yellow_cards_1', 'xG_1', 'xA_1', 'key_passes_1', 'xGChain_1', 'team_xG_1', 'team_xGA_1', 'team_deep_1', 'team_deep_allowed_1', 'team_scored_1', 'team_xpts_1', 'team_wins_1', 'team_draws_1', 'team_loses_1', 'team_ppda_att_1', 'team_ppda_def_1', 'team_ppda_allowed_att_1', 'team_ppda_allowed_def_1', 'opp_xG_1', 'opp_xGA_1', 'opp_deep_1', 'opp_deep_allowed_1', 'opp_scored_1', 'opp_missed_1', 'opp_xpts_1', 'opp_wins_1', 'opp_draws_1', 'opp_loses_1', 'opp_ppda_att_1', 'opp_ppda_def_1', 'opp_ppda_allowed_att_1', 'opp_ppda_allowed_def_1', 'assists_2', 'bonus_2', 'clean_sheets_2', 'goals_scored_2', 'goals_conceded_2', 'minutes_2', 'own_goals_2', 'penalties_saved_2', 'red_cards_2', 'saves_2', 'team_a_score_2', 'team_h_score_2', 'total_points_2', 'transfers_in_2', 'transfers_out_2', 'was_home_2', 'yellow_cards_2', 'xG_2', 'xA_2', 'key_passes_2', 'xGChain_2', 'team_xG_2', 'team_xGA_2', 'team_deep_2', 'team_deep_allowed_2', 'team_scored_2', 'team_xpts_2', 'team_wins_2', 'team_draws_2', 'team_loses_2', 'team_ppda_att_2', 'team_ppda_def_2', 'team_ppda_allowed_att_2', 'team_ppda_allowed_def_2', 'opp_xG_2', 'opp_xGA_2', 'opp_deep_2', 'opp_deep_allowed_2', 'opp_scored_2', 'opp_missed_2', 'opp_xpts_2', 'opp_wins_2', 'opp_draws_2', 'opp_loses_2', 'opp_ppda_att_2', 'opp_ppda_def_2', 'opp_ppda_allowed_att_2', 'opp_ppda_allowed_def_2', 'assists_3', 'bonus_3', 'clean_sheets_3', 'goals_scored_3', 'goals_conceded_3', 'minutes_3', 'own_goals_3', 'penalties_saved_3', 'red_cards_3', 'saves_3', 'team_a_score_3', 'team_h_score_3', 'total_points_3', 'transfers_in_3', 'transfers_out_3', 'was_home_3', 'yellow_cards_3', 'xG_3', 'xA_3', 'key_passes_3', 'xGChain_3', 'team_xG_3', 'team_xGA_3', 'team_deep_3', 'team_deep_allowed_3', 'team_scored_3', 'team_xpts_3', 'team_wins_3', 'team_draws_3', 'team_loses_3', 'team_ppda_att_3', 'team_ppda_def_3', 'team_ppda_allowed_att_3', 'team_ppda_allowed_def_3', 'opp_xG_3', 'opp_xGA_3', 'opp_deep_3', 'opp_deep_allowed_3', 'opp_scored_3', 'opp_missed_3', 'opp_xpts_3', 'opp_wins_3', 'opp_draws_3', 'opp_loses_3', 'opp_ppda_att_3', 'opp_ppda_def_3', 'opp_ppda_allowed_att_3', 'opp_ppda_allowed_def_3', 'assists_4', 'bonus_4', 'clean_sheets_4', 'goals_scored_4', 'goals_conceded_4', 'minutes_4', 'own_goals_4', 'penalties_saved_4', 'red_cards_4', 'saves_4', 'team_a_score_4', 'team_h_score_4', 'total_points_4', 'transfers_in_4', 'transfers_out_4', 'was_home_4', 'yellow_cards_4', 'starts_4', 'xG_4', 'xA_4', 'key_passes_4', 'xGChain_4', 'team_xG_4', 'team_xGA_4', 'team_deep_4', 'team_deep_allowed_4', 'team_scored_4', 'team_xpts_4', 'team_wins_4', 'team_draws_4', 'team_loses_4', 'team_ppda_att_4', 'team_ppda_def_4', 'team_ppda_allowed_att_4', 'team_ppda_allowed_def_4', 'opp_xG_4', 'opp_xGA_4', 'opp_deep_4', 'opp_deep_allowed_4', 'opp_scored_4', 'opp_missed_4', 'opp_xpts_4', 'opp_wins_4', 'opp_draws_4', 'opp_loses_4', 'opp_ppda_att_4', 'opp_ppda_def_4', 'opp_ppda_allowed_att_4', 'opp_ppda_allowed_def_4']
priority_list.reverse()
dropped_cols = []
X_train_temp = X_train2.copy()
selection_complete = False
p_value_threshold = 0.1
while not selection_complete:
model_temp = sm.OLS(y_train, X_train_temp)
res_temp = model_temp.fit()
drop_candidates = res_temp.pvalues.loc[res_temp.pvalues > p_value_threshold]
if len(drop_candidates) == 0:
selection_complete = True
continue
drop_col = priority_list.pop([col in drop_candidates for col in priority_list].index(True))
print("{}: {:.3f}".format(drop_col, drop_candidates.loc[drop_col]))
dropped_cols.append(drop_col)
X_train_temp.drop(drop_col, axis=1, inplace=True)
opp_ppda_allowed_def_4: 0.836 opp_ppda_def_4: 0.716 opp_loses_4: 0.684 opp_draws_4: 0.464 opp_wins_4: 0.728 opp_xpts_4: 0.341 opp_ppda_att_4: 0.103 opp_ppda_allowed_att_4: 0.102 opp_missed_4: 0.470 opp_deep_allowed_4: 0.353 opp_deep_4: 0.149 opp_xGA_4: 0.733 opp_xG_4: 0.424 team_ppda_allowed_def_4: 0.649 team_ppda_allowed_att_4: 0.507 team_ppda_def_4: 0.679 team_ppda_att_4: 0.367 team_loses_4: 0.720 team_draws_4: 0.846 team_wins_4: 0.703 team_xpts_4: 0.136 team_deep_allowed_4: 0.573 team_xGA_4: 0.953 xGChain_4: 0.840 key_passes_4: 0.388 xA_4: 0.443 xG_4: 0.997 yellow_cards_4: 0.507 was_home_4: 0.566 transfers_in_4: 0.803 total_points_4: 0.613 saves_4: 0.818 own_goals_4: 0.449 minutes_4: 0.285 goals_conceded_4: 0.171 team_h_score_4: 0.256 team_scored_4: 0.233 team_xG_4: 0.131 team_deep_4: 0.104 team_a_score_4: 0.623 clean_sheets_4: 0.548 opp_ppda_allowed_def_3: 0.230 opp_ppda_allowed_att_3: 0.852 opp_ppda_def_3: 0.566 opp_ppda_att_3: 0.993 opp_loses_3: 0.414 opp_wins_3: 0.933 opp_xpts_3: 0.277 opp_missed_3: 0.435 opp_scored_3: 0.116 opp_deep_allowed_3: 0.517 opp_deep_3: 0.229 opp_xGA_3: 0.394 opp_xG_3: 0.223 team_ppda_allowed_att_3: 0.241 team_ppda_def_3: 0.825 team_ppda_att_3: 0.530 team_loses_3: 0.118 team_draws_3: 0.212 team_wins_3: 0.335 team_xpts_3: 0.427 team_scored_3: 0.430 team_deep_allowed_3: 0.277 team_deep_3: 0.442 team_xGA_3: 0.431 team_xG_3: 0.291 xGChain_3: 0.735 xA_3: 0.175 key_passes_3: 0.145 xG_3: 0.960 yellow_cards_3: 0.559 was_home_3: 0.468 transfers_out_3: 0.756 transfers_in_3: 0.798 total_points_3: 0.923 team_h_score_3: 0.968 team_a_score_3: 0.505 penalties_saved_3: 0.637 own_goals_3: 0.896 minutes_3: 0.175 goals_conceded_3: 0.946 bonus_3: 0.212 saves_3: 0.167 clean_sheets_3: 0.180 opp_ppda_allowed_def_2: 0.570 opp_ppda_allowed_att_2: 0.550 opp_ppda_def_2: 0.701 opp_ppda_att_2: 0.949 opp_draws_2: 0.630 opp_wins_2: 0.280 opp_xpts_2: 0.724 opp_scored_2: 0.999 opp_deep_allowed_2: 0.411 opp_deep_2: 0.497 opp_xGA_2: 0.233 opp_xG_2: 0.938 team_ppda_allowed_def_2: 0.516 team_ppda_allowed_att_2: 0.469 team_ppda_def_2: 0.235 penalties_saved_4: 0.101 team_loses_2: 0.375 team_draws_2: 0.457 team_wins_2: 0.627 team_xpts_2: 0.341 team_deep_allowed_2: 0.899 team_deep_2: 0.709 team_xGA_2: 0.449 team_xG_2: 0.519 xGChain_2: 0.379 key_passes_2: 0.510 xA_2: 0.274 xG_2: 0.446 transfers_out_2: 0.455 transfers_in_2: 0.719 total_points_2: 0.557 saves_2: 0.384 penalties_saved_2: 0.722 own_goals_2: 0.881 minutes_2: 0.582 clean_sheets_2: 0.550 bonus_2: 0.115 opp_ppda_allowed_def_1: 0.753 opp_ppda_att_1: 0.673 opp_loses_1: 0.279 opp_draws_1: 0.592 opp_wins_1: 0.239 opp_xpts_1: 0.427 opp_missed_1: 0.143 opp_deep_1: 0.144 opp_deep_allowed_1: 0.148 opp_xGA_1: 0.843 opp_xG_1: 0.850 team_ppda_def_1: 0.547 opp_ppda_allowed_att_1: 0.104 team_ppda_att_1: 0.729 team_loses_1: 0.486 team_draws_1: 0.304 team_wins_1: 0.684 team_xpts_1: 0.282 team_scored_1: 0.469 team_deep_allowed_1: 0.380 team_deep_1: 0.510 team_xG_1: 0.916 xGChain_1: 0.269 key_passes_1: 0.607 xA_1: 0.522 xG_1: 0.684 was_home_1: 0.608 transfers_out_1: 0.637 transfers_in_1: 0.162 team_h_score_1: 0.565 team_a_score_1: 0.293 opp_xG_avg: 0.947 clean_sheet_ratio: 0.582 opp_elo: 0.122 team_elo: 0.934 value: 0.288 transfers_balance_rank: 0.920 transfers_out_pct: 0.250 transfers_in_pct: 0.964 transfers_out: 0.193 transfers_in: 0.360 selected: 0.238 log_transfers_out_pct: 0.349 log_transfers_in_pct: 0.293 log_transfers_out: 0.613 log_transfers_in: 0.616 log_selected: 0.357 transfers_out_4: 0.116
# Examine model chosen by backward selection
X_train_bs = sm.add_constant(X_train_temp)
model_bs = sm.OLS(y_train, X_train_bs)
res_bs = model_bs.fit()
print(res_bs.summary())
OLS Regression Results
==============================================================================
Dep. Variable: total_points R-squared: 0.162
Model: OLS Adj. R-squared: 0.113
Method: Least Squares F-statistic: 3.280
Date: Fri, 29 Mar 2024 Prob (F-statistic): 2.83e-08
Time: 09:43:31 Log-Likelihood: -1311.1
No. Observations: 540 AIC: 2684.
Df Residuals: 509 BIC: 2817.
Df Model: 30
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------------
const 3.7148 0.122 30.556 0.000 3.476 3.954
was_home 0.2521 0.129 1.960 0.051 -0.001 0.505
bonus_1 -1.0936 0.327 -3.342 0.001 -1.736 -0.451
clean_sheets_1 -1.2789 0.505 -2.533 0.012 -2.271 -0.287
goals_conceded_1 1.1976 0.302 3.971 0.000 0.605 1.790
minutes_1 -0.7671 0.226 -3.397 0.001 -1.211 -0.323
own_goals_1 0.2406 0.138 1.745 0.082 -0.030 0.512
penalties_saved_1 -0.5130 0.241 -2.129 0.034 -0.987 -0.040
saves_1 -0.5200 0.269 -1.932 0.054 -1.049 0.009
total_points_1 2.9810 0.937 3.183 0.002 1.141 4.821
yellow_cards_1 0.3076 0.150 2.046 0.041 0.012 0.603
team_xGA_1 -0.4877 0.185 -2.641 0.009 -0.851 -0.125
team_ppda_allowed_att_1 -0.3643 0.147 -2.483 0.013 -0.652 -0.076
team_ppda_allowed_def_1 0.3313 0.129 2.566 0.011 0.078 0.585
goals_conceded_2 0.7005 0.293 2.388 0.017 0.124 1.277
team_a_score_2 -0.7681 0.293 -2.618 0.009 -1.344 -0.192
team_h_score_2 -0.7037 0.317 -2.223 0.027 -1.326 -0.082
was_home_2 -0.2800 0.133 -2.109 0.035 -0.541 -0.019
yellow_cards_2 -0.3032 0.126 -2.410 0.016 -0.550 -0.056
team_scored_2 0.9419 0.318 2.960 0.003 0.317 1.567
team_ppda_att_2 0.2705 0.130 2.080 0.038 0.015 0.526
team_ppda_allowed_def_3 -0.4010 0.128 -3.145 0.002 -0.651 -0.150
bonus_4 0.2360 0.127 1.857 0.064 -0.014 0.486
opp_scored_1 -0.3240 0.167 -1.946 0.052 -0.651 0.003
opp_ppda_def_1 0.2814 0.126 2.238 0.026 0.034 0.528
opp_missed_2 -0.4349 0.168 -2.590 0.010 -0.765 -0.105
opp_loses_2 0.3442 0.179 1.918 0.056 -0.008 0.697
opp_draws_3 0.4167 0.126 3.307 0.001 0.169 0.664
opp_scored_4 -0.4067 0.164 -2.483 0.013 -0.729 -0.085
diff_elo 0.5755 0.159 3.631 0.000 0.264 0.887
opp_G_avg 0.6061 0.223 2.721 0.007 0.168 1.044
==============================================================================
Omnibus: 51.959 Durbin-Watson: 1.950
Prob(Omnibus): 0.000 Jarque-Bera (JB): 64.670
Skew: 0.836 Prob(JB): 9.06e-15
Kurtosis: 3.286 Cond. No. 17.0
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
resid_bs = res_bs.resid_pearson
# QQ plot for residuals
fig = sm.qqplot(resid_bs)
a = np.linspace(-3, 3, 2)
plt.plot(a, a, color='black', linestyle='--')
plt.show()
# Plot of standardized residuals vs fitted values
fitted_values_bs = res_bs.fittedvalues
fig, ax = plt.subplots()
ax.scatter(fitted_values_bs, resid_bs, edgecolors='black', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Standardised residuals")
plt.show()
# Plot of fitted values vs response
fig, ax = plt.subplots()
ax.scatter(y_train, fitted_values_bs, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
ax.set_axisbelow(True)
plt.grid()
plt.show()
# Create residual vs regressor plots
for col in X_train_bs.columns[1:]:
fig, ax = plt.subplots()
ax.set_title(col)
ax.scatter(X_train_bs[col], res_bs.resid, edgecolor='k', linewidths=0.5)
C:\Users\jonat\AppData\Local\Temp\ipykernel_30348\3216224720.py:3: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`). Consider using `matplotlib.pyplot.close()`. fig, ax = plt.subplots()
# Check for leverage points and outliers
res_bs.get_influence().plot_index()
res_bs.get_influence().plot_influence()
corr_matrix_bs = X_train_bs.corr()
smg.plot_corr(corr_matrix_bs)
plt.show()
# Try the backward selection model on the test data
X_test_bs = X_test.loc[:, X_train_temp.columns]
X_test_bs['const'] = 1.0
X_test_bs = X_test_bs[X_train_bs.columns]
test_bs_fitted_values = res_bs.predict(X_test_bs)
test_bs_resid = test_bs_fitted_values - y_test
# Plot of fitted values vs actual values
fig, ax = plt.subplots()
ax.scatter(y_test, test_bs_fitted_values, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
plt.show()
# Plot of residuals vs fitted values
fig, ax = plt.subplots()
ax.scatter(test_bs_fitted_values, test_bs_resid, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Raw residuals")
Text(0, 0.5, 'Raw residuals')
# Compute OOS MAE and MSE
mae_bs = np.mean(abs(test_bs_resid))
mse_bs = np.mean(test_bs_resid**2)
print("MAE: {:.3f}\nMSE: {:.3f}".format(mae_bs, mse_bs))
MAE: 2.393 MSE: 8.880
# Run a lasso regression implementing 20-fold cross-validation for the selection of the tuning parameter
def split(a, n):
k = len(a) // n
q = len(a) % n
return [a[(i*k + min(i, q)):(i+1)*k + 1 + min(i, q-1)] for i in range(n)]
validation_idxs = split(X_train2.sample(frac=1).index, 20)
alpha_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
lasso_cv_error_dict = {}
for a in alpha_list:
print(a)
lasso_cv_error_dict[a] = []
for i, idx in enumerate(validation_idxs):
X_test_cv = X_train2.loc[idx]
y_test_cv = y_train[idx]
X_train_cv = X_train2.drop(idx, axis=0)
y_train_cv = y_train.drop(idx)
model_lasso = sm.OLS(y_train_cv, X_train_cv)
res_lasso = model_lasso.fit_regularized(alpha=a, L1_wt=1)
cv_fitted_vals = res_lasso.predict(X_test_cv)
lasso_cv_error_dict[a] += (cv_fitted_vals - y_test_cv).tolist()
1e-06 1e-05 0.0001 0.001 0.01 0.1 1 10
lasso_cv_mae = []
lasso_cv_mse = []
for a in alpha_list:
lasso_cv_mae.append(np.mean(abs(np.array(lasso_cv_error_dict[a]))))
lasso_cv_mse.append(np.mean(np.array(lasso_cv_error_dict[a])**2))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
ax1.plot(alpha_list, lasso_cv_mae)
ax1.set_xscale("log")
ax1.set_xlabel("Alpha")
ax1.set_ylabel("Mean absolute error")
ax2.plot(alpha_list, lasso_cv_mse)
ax2.set_xscale("log")
ax2.set_xlabel("Alpha")
ax2.set_ylabel("Mean square error")
plt.show()
alpha_lasso = 0.1
model_lasso = sm.OLS(y_train, X_train2)
res_lasso = model_lasso.fit_regularized(alpha=alpha_lasso, L1_wt=1)
lasso_test_fitted_values = res_lasso.predict(X_test2)
lasso_test_resid = lasso_test_fitted_values - y_test
# Plot of fitted values vs actual values
fig, ax = plt.subplots()
ax.scatter(y_test, lasso_test_fitted_values, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
plt.show()
# Plot of residuals vs fitted values
fig, ax = plt.subplots()
ax.scatter(lasso_test_fitted_values, lasso_test_resid, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Raw residuals")
Text(0, 0.5, 'Raw residuals')
# Compute OOS MAE and MSE
mae_lasso = np.mean(abs(lasso_test_resid))
mse_lasso = np.mean(lasso_test_resid**2)
print("MAE: {:.3f}\nMSE: {:.3f}".format(mae_lasso, mse_lasso))
MAE: 2.332 MSE: 8.138
alpha_list = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]
ridge_cv_error_dict = {}
for a in alpha_list:
print(a)
ridge_cv_error_dict[a] = []
for i, idx in enumerate(validation_idxs):
X_test_cv = X_train2.loc[idx]
y_test_cv = y_train[idx]
X_train_cv = X_train2.drop(idx, axis=0)
y_train_cv = y_train.drop(idx)
model_ridge = sm.OLS(y_train_cv, X_train_cv)
res_ridge = model_ridge.fit_regularized(alpha=a, L1_wt=0)
cv_fitted_vals = res_ridge.predict(X_test_cv)
ridge_cv_error_dict[a] += (cv_fitted_vals - y_test_cv).tolist()
1e-06 1e-05 0.0001 0.001 0.01 0.1 1
ridge_cv_mae = []
ridge_cv_mse = []
for a in alpha_list:
ridge_cv_mae.append(np.mean(abs(np.array(ridge_cv_error_dict[a]))))
ridge_cv_mse.append(np.mean(np.array(ridge_cv_error_dict[a])**2))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
ax1.plot(alpha_list, ridge_cv_mae)
ax1.set_xscale("log")
ax1.set_xlabel("Alpha")
ax1.set_ylabel("Mean absolute error")
ax2.plot(alpha_list, ridge_cv_mse)
ax2.set_xscale("log")
ax2.set_xlabel("Alpha")
ax2.set_ylabel("Mean square error")
plt.show()
alpha_ridge = 0.1
model_ridge = sm.OLS(y_train, X_train2)
res_ridge = model_ridge.fit_regularized(alpha=alpha_ridge, L1_wt=1)
ridge_test_fitted_values = res_ridge.predict(X_test2)
ridge_test_resid = ridge_test_fitted_values - y_test
# Plot of fitted values vs actual values
fig, ax = plt.subplots()
ax.scatter(y_test, ridge_test_fitted_values, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Response")
ax.set_ylabel("Fitted values")
plt.show()
# Plot of residuals vs fitted values
fig, ax = plt.subplots()
ax.scatter(ridge_test_fitted_values, ridge_test_resid, edgecolor='k', linewidths=0.5)
ax.set_xlabel("Fitted values")
ax.set_ylabel("Raw residuals")
Text(0, 0.5, 'Raw residuals')
# Compute OOS MAE and MSE
mae_ridge = np.mean(abs(ridge_test_resid))
mse_ridge = np.mean(ridge_test_resid**2)
print("MAE: {:.3f}\nMSE: {:.3f}".format(mae_ridge, mse_ridge))
MAE: 2.099 MSE: 6.466
# Implement a random forest regression
# Begin by performing cross-validation to test on the appropriate depth of the tree and the number of iterators to use
max_depth = [2, 3, 4, 5, 6, 10, 20]
n_estimators = [10, 100, 1000]
mae_rf_cv = {}
mse_rf_cv = {}
validation_sets = split(X_train2.sample(frac=1).index, 10)
for n in n_estimators:
mae_rf_cv[n] = {'train': [], 'test': []}
mse_rf_cv[n] = {'train': [], 'test': []}
for depth in max_depth:
print("depth: {}, no. of estimators: {}".format(depth, n))
cv_train_mae = []
cv_train_mse = []
cv_val_mae = []
cv_val_mse = []
for validation_idxs in validation_sets:
X_train_rf_cv = X_train2.drop(validation_idxs, axis=0)
y_train_rf_cv = y_train.drop(validation_idxs)
X_validation_rf_cv = X_train2.loc[validation_idxs]
y_validation_rf_cv = y_train.loc[validation_idxs]
regr = RandomForestRegressor(max_depth=depth, random_state=0, oob_score=True, n_estimators=n)
regr.fit(X_train_rf_cv, y_train_rf_cv)
predict_train = regr.predict(X_train_rf_cv)
predict_val = regr.predict(X_validation_rf_cv)
resid_train = predict_train - y_train_rf_cv
resid_test = predict_val - y_validation_rf_cv
cv_train_mae.append(np.mean(abs(resid_train)))
cv_train_mse.append(np.mean(resid_train**2))
cv_val_mae.append(np.mean(abs(resid_test)))
cv_val_mse.append(np.mean(resid_test**2))
mae_rf_cv[n]['train'].append(np.mean(cv_train_mae))
mse_rf_cv[n]['train'].append(np.mean(cv_train_mse))
mae_rf_cv[n]['test'].append(np.mean(cv_val_mae))
mse_rf_cv[n]['test'].append(np.mean(cv_val_mse))
depth: 2, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 3, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 4, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 5, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 6, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 10, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 20, no. of estimators: 10
C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn( C:\Users\jonat\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py:560: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable OOB estimates. warn(
depth: 2, no. of estimators: 100 depth: 3, no. of estimators: 100 depth: 4, no. of estimators: 100 depth: 5, no. of estimators: 100 depth: 6, no. of estimators: 100 depth: 10, no. of estimators: 100 depth: 20, no. of estimators: 100 depth: 2, no. of estimators: 1000 depth: 3, no. of estimators: 1000 depth: 4, no. of estimators: 1000 depth: 5, no. of estimators: 1000 depth: 6, no. of estimators: 1000 depth: 10, no. of estimators: 1000 depth: 20, no. of estimators: 1000
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 14))
for n in n_estimators:
ax1.plot(max_depth, mae_rf_cv[n]['train'], label="{}".format(n))
ax2.plot(max_depth, mae_rf_cv[n]['test'], label="{}".format(n))
ax3.plot(max_depth, mse_rf_cv[n]['train'], label="{}".format(n))
ax4.plot(max_depth, mse_rf_cv[n]['test'], label="{}".format(n))
ax1.set_xscale('log')
ax1.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
ax1.tick_params(which='major', labelbottom=False)
ax1.set_xlabel('Maximum tree depth')
ax1.set_ylabel('MAE on training set')
legend1 = ax1.legend(loc='upper right')
ax2.set_xscale('log')
ax2.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
ax2.tick_params(which='major', labelbottom=False)
ax2.set_xlabel('Maximum tree depth')
ax2.set_ylabel('MAE on validation sets')
legend2 = ax2.legend(loc='upper right')
ax3.set_xscale('log')
ax3.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
ax3.tick_params(which='major', labelbottom=False)
ax3.set_xlabel('Maximum tree depth')
ax3.set_ylabel('MSE on training set')
legend3 = ax3.legend(loc='upper right')
ax4.set_xscale('log')
ax4.xaxis.set_minor_formatter(FormatStrFormatter("%.0f"))
ax4.tick_params(which='major', labelbottom=False)
ax4.set_xlabel('Maximum tree depth')
ax4.set_ylabel('MSE on validation sets')
legend4 = ax4.legend(loc='upper right')
plt.show()
# Fit the final random forest regression model
max_depth_final = 10
n_estimators_final = 1000
rf_regr1 = RandomForestRegressor(max_depth=max_depth_final, random_state=0, oob_score=True, n_estimators=n_estimators_final)
rf_regr1.fit(X_train, y_train)
predict_train = rf_regr1.predict(X_train)
predict_test = rf_regr1.predict(X_test)
resid_train = predict_train - y_train
resid_test = predict_test - y_test
mae_rf_train = np.mean(abs(resid_train))
mae_rf_test = np.mean(abs(resid_test))
mse_rf_train = np.mean(resid_train**2)
mse_rf_test = np.mean(resid_test**2)
print("MAE on test set = {}\nMSE on test set = {}".format(mae_rf_test, mse_rf_test))
MAE on test set = 1.3688073266040326 MSE on test set = 4.199005651780417
# Examine feature importance
importances = pd.Series(rf_regr1.feature_importances_, index=X_train.columns).sort_values()
fig, ax = plt.subplots(figsize=(8, 60))
ax.margins(y = 0.0025)
ax.barh(importances.index, importances)
plt.show()
importance_threshold = 0.0025
rf_vars_initial = importances[importances >= importance_threshold].index.tolist()
n_vars = len(rf_vars_initial)
print("No. of variables = {}".format(len(importances)))
print("No. of variables above importance threshold = {}".format(len(rf_vars_initial)))
No. of variables = 258 No. of variables above importance threshold = 81
# We will only perform the variable selection CV using variables that are above the importance threshold in the initial model
# since running this for all variables would be time-consuming although theoretically preferred.
# Set up the variable selection cross-validation
Z_train = X_train[rf_vars_initial]
Z_test = X_test[rf_vars_initial]
rf_vars_current = rf_vars_initial.copy()
validation_sets = split(X_train.index, 10)
cv_info = {}
max_depth = 10
n_estimators = 100
while len(rf_vars_current) > 0:
print(len(rf_vars_current))
cv_info[len(rf_vars_current)] = {}
cv_info[len(rf_vars_current)]['vars'] = rf_vars_current
Z_train = Z_train[rf_vars_current]
rf_regr_vs = RandomForestRegressor(max_depth=max_depth,
random_state=0,
oob_score=True,
n_estimators=n_estimators)
rf_regr_vs.fit(Z_train, y_train)
# Get MAE and MSE estimates using cross-validation
mae = []
mse = []
for validation_idxs in validation_sets:
Z_train1 = Z_train.drop(validation_idxs, axis=0)
y_train1 = y_train.drop(validation_idxs)
Z_val = Z_train.loc[validation_idxs]
y_val = y_train.loc[validation_idxs]
rf_regr = RandomForestRegressor(max_depth=max_depth,
random_state=0,
oob_score=True,
n_estimators=n_estimators)
rf_regr.fit(Z_train1, y_train1)
predict_val = rf_regr.predict(Z_val)
resid_val = predict_val - y_val
mae.append(np.mean(abs(resid_val)))
mse.append(np.mean(resid_val**2))
cv_info[len(rf_vars_current)]['mae'] = np.mean(mae)
cv_info[len(rf_vars_current)]['mse'] = np.mean(mse)
var_importances = pd.Series(rf_regr_vs.feature_importances_, index=rf_vars_current).sort_values()
drop_var = var_importances.index[0]
rf_vars_current.remove(var_importances.index[0])
['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_scored_4', 'opp_ppda_allowed_att_3', 'team_ppda_allowed_att_2', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_deep_allowed_4', 'opp_npxGA_4', 'team_deep_1', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'transfers_in_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'team_ppda_allowed_att_2', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_deep_allowed_4', 'opp_npxGA_4', 'team_deep_1', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'transfers_in_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'team_ppda_allowed_att_2', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_deep_allowed_4', 'opp_npxGA_4', 'team_deep_1', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'team_ppda_allowed_att_2', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_deep_allowed_4', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_deep_allowed_4', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_ppda_att_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'ict_index_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_npxG_4', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_ppda_allowed_att_3', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_xG_2', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_allowed_def_3', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xpts_3', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_xG_1', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'bonus_2', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'opp_ppda_allowed_def_2', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'team_ppda_def_4', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'team_deep_allowed_1', 'bps_3', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'opp_deep_2', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'bps_3', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'bps_3', 'opp_ppda_def_2', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'bps_3', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'influence_2', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'xGChain_2', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'team_ppda_allowed_att_4', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'team_ppda_def_1', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'opp_npxG_2', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_npxG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'opp_npxGA_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'opp_xG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_ppda_def_4', 'opp_npxGA_3', 'opp_xG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_npxGA_3', 'opp_xG_3', 'bonus_1', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_ppda_att_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'team_ppda_att_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'transfers_out_3', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxGA_1', 'team_npxG_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxG_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'transfers_out', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxG_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'opp_xpts_2', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxG_1', 'team_ppda_allowed_att_1', 'opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'team_npxG_1', 'opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'influence_1', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_xGA_4', 'opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_def_3', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_npxGA_4', 'team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'opp_ppda_allowed_att_1', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'team_ppda_allowed_def_1', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'total_points_3', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'was_home', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['team_missed_1', 'influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['influence_4', 'value', 'opp_ppda_def_1', 'team_npxG_4', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['influence_4', 'value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'opp_xpts_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'opp_deep_1', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_xG_4', 'transfers_in', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['value', 'opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_npxG_1', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_scored_3', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_ppda_def_1', 'opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_1', 'opp_ppda_allowed_def_4', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_ppda_allowed_def_4', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_xG_2', 'team_npxG_2', 'team_ppda_allowed_def_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_4', 'ict_index_2', 'opp_xG_2', 'team_npxG_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_elo', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'team_elo', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'team_xpts_4', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'opp_ppda_att_2', 'opp_ppda_allowed_att_2', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['ict_index_2', 'opp_xG_2', 'team_npxG_2', 'opp_ppda_allowed_att_2', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_2', 'team_npxG_2', 'opp_ppda_allowed_att_2', 'opp_scored_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_2', 'team_npxG_2', 'opp_ppda_allowed_att_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_2', 'opp_ppda_allowed_att_2', 'total_points_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_2', 'opp_ppda_allowed_att_2', 'opp_scored_1', 'total_points_1'] ['opp_xG_2', 'opp_ppda_allowed_att_2', 'total_points_1'] ['opp_xG_2', 'total_points_1'] ['opp_xG_2']
idx = np.linspace(n_vars, 1, n_vars)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
ax1.plot(idx, mae_varselect_cv)
ax1.set_xlabel("No. of variables")
ax1.set_ylabel("Validation MAE")
ax2.plot(idx, mse_varselect_cv)
ax2.set_xlabel("No. of variables")
ax2.set_ylabel("Validation MSE")
plt.show()
# Fit the model with 31 variables as the final random forest model
rf_vars_final = ['value', 'opp_ppda_def_1', 'opp_npxGA_3', 'opp_xG_3', 'opp_ppda_att_4', 'opp_xG_4', 'transfers_in',
'ict_index_2', 'opp_ppda_allowed_def_1', 'transfers_out_4', 'opp_ppda_allowed_def_4', 'opp_xpts_1',
'opp_xG_2', 'team_npxG_2', 'team_ppda_att_2', 'team_ppda_allowed_def_2', 'team_ppda_def_2',
'team_elo', 'opp_npxG_1', 'team_deep_allowed_2', 'opp_ppda_att_2', 'opp_scored_3',
'opp_ppda_allowed_att_2', 'opp_elo', 'bps_2', 'bps_1', 'team_xpts_4', 'opp_scored_2',
'total_points_2', 'opp_scored_1', 'total_points_1']
X_train_rf = X_train[rf_vars_final]
rf_regr_final = RandomForestRegressor(max_depth=20, random_state=0, oob_score=True, n_estimators=n_estimators_final)
rf_regr_final.fit(X_train_rf, y_train)
RandomForestRegressor(max_depth=20, n_estimators=1000, oob_score=True,
random_state=0)
rf_predict_final = rf_regr_final.predict(X_test[rf_vars_final])
rf_resid_final = rf_predict_final - y_test
mae_test = np.mean(abs(rf_resid_final))
mse_test = np.mean(rf_resid_final**2)
print("MAE on test set = {:.3f}\nMSE on test set = {:.3f}".format(mae_test, mse_test))
MAE on test set = 1.297 MSE on test set = 4.164